<!DOCTYPE html>
<html lang="en-US">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width,initial-scale=1">
    <title>布隆过滤器是什么？ | JavaKeeper</title>
    <meta name="generator" content="VuePress 1.5.4">
    <link rel="icon" href="/icon.svg">
    <script>
        var _hmt = _hmt || [];
        (function() {
            var hm = document.createElement("script");
            hm.src = "https://hm.baidu.com/hm.js?a949a9b30eb86ac0159e735ff8670c03";
            var s = document.getElementsByTagName("script")[0];
            s.parentNode.insertBefore(hm, s);
            // 引入谷歌,不需要可删除这段
            var hm1 = document.createElement("script");
            hm1.src = "https://www.googletagmanager.com/gtag/js?id=UA-169923503-1";
            var s1 = document.getElementsByTagName("script")[0]; 
            s1.parentNode.insertBefore(hm1, s1);
        })();
        // 谷歌加载,不需要可删除
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        gtag('config', 'UA-169923503-1');
    </script>
    <meta name="description" content="">
    <meta name="keywords" content="JavaKeeper,Java,Java开发,算法,blog">
    <link rel="preload" href="/assets/css/0.styles.91f57736.css" as="style"><link rel="preload" href="/assets/js/app.447d4224.js" as="script"><link rel="preload" href="/assets/js/3.9d76740c.js" as="script"><link rel="preload" href="/assets/js/1.c4fd7d2e.js" as="script"><link rel="preload" href="/assets/js/50.703bffab.js" as="script"><link rel="prefetch" href="/assets/js/10.8cf3be2c.js"><link rel="prefetch" href="/assets/js/100.74f35ab8.js"><link rel="prefetch" href="/assets/js/101.7a062346.js"><link rel="prefetch" href="/assets/js/102.c9485235.js"><link rel="prefetch" href="/assets/js/103.d88a3805.js"><link rel="prefetch" href="/assets/js/104.6e034144.js"><link rel="prefetch" href="/assets/js/105.d22f7450.js"><link rel="prefetch" href="/assets/js/106.a6cb54b0.js"><link rel="prefetch" href="/assets/js/107.7b65e72b.js"><link rel="prefetch" href="/assets/js/108.eb5804bb.js"><link rel="prefetch" href="/assets/js/109.05f775e5.js"><link rel="prefetch" href="/assets/js/11.c54ae13c.js"><link rel="prefetch" href="/assets/js/110.51d3d641.js"><link rel="prefetch" href="/assets/js/111.022b64a7.js"><link rel="prefetch" href="/assets/js/112.da8afd52.js"><link rel="prefetch" href="/assets/js/113.05a17b18.js"><link rel="prefetch" href="/assets/js/114.8960d913.js"><link rel="prefetch" href="/assets/js/115.67919f68.js"><link rel="prefetch" href="/assets/js/116.62b0cd71.js"><link rel="prefetch" href="/assets/js/117.ebac3eff.js"><link rel="prefetch" href="/assets/js/118.ecd629bd.js"><link rel="prefetch" href="/assets/js/119.a09a0897.js"><link rel="prefetch" href="/assets/js/12.60aa3b24.js"><link rel="prefetch" href="/assets/js/120.bf639d3d.js"><link rel="prefetch" href="/assets/js/121.b89d0c8e.js"><link rel="prefetch" href="/assets/js/122.1a75ff83.js"><link rel="prefetch" href="/assets/js/123.d2127132.js"><link rel="prefetch" href="/assets/js/124.2caff9e0.js"><link rel="prefetch" href="/assets/js/125.9b9f966a.js"><link rel="prefetch" href="/assets/js/126.58cdfb3d.js"><link rel="prefetch" href="/assets/js/127.8ef09c53.js"><link rel="prefetch" href="/assets/js/128.efdc2ae4.js"><link rel="prefetch" href="/assets/js/129.e35cbc57.js"><link rel="prefetch" href="/assets/js/13.125c13a0.js"><link rel="prefetch" href="/assets/js/130.f01a55e3.js"><link rel="prefetch" href="/assets/js/131.65205f4a.js"><link rel="prefetch" href="/assets/js/132.f42c5a0a.js"><link rel="prefetch" href="/assets/js/133.9ba468b3.js"><link rel="prefetch" href="/assets/js/134.7b597ba9.js"><link rel="prefetch" href="/assets/js/135.fb828b9a.js"><link rel="prefetch" href="/assets/js/136.3887532f.js"><link rel="prefetch" href="/assets/js/137.549bae01.js"><link rel="prefetch" href="/assets/js/138.db8d423d.js"><link rel="prefetch" href="/assets/js/139.dbaf2267.js"><link rel="prefetch" href="/assets/js/14.bd1d0b0d.js"><link rel="prefetch" href="/assets/js/140.6cb65fdc.js"><link rel="prefetch" href="/assets/js/141.9bd6cc4b.js"><link rel="prefetch" href="/assets/js/142.552db5ed.js"><link rel="prefetch" href="/assets/js/143.2c9f2bf4.js"><link rel="prefetch" href="/assets/js/144.fba98a15.js"><link rel="prefetch" href="/assets/js/145.c42f3a21.js"><link rel="prefetch" href="/assets/js/146.596d4d33.js"><link rel="prefetch" href="/assets/js/147.c48ae5c1.js"><link rel="prefetch" href="/assets/js/148.71064871.js"><link rel="prefetch" href="/assets/js/149.16582d21.js"><link rel="prefetch" href="/assets/js/15.f247873b.js"><link rel="prefetch" href="/assets/js/150.ead09aca.js"><link rel="prefetch" href="/assets/js/151.971fdf4b.js"><link rel="prefetch" href="/assets/js/152.369c9362.js"><link rel="prefetch" href="/assets/js/153.371edd15.js"><link rel="prefetch" href="/assets/js/154.e090b491.js"><link rel="prefetch" href="/assets/js/155.c68bf602.js"><link rel="prefetch" href="/assets/js/156.304aea8d.js"><link rel="prefetch" href="/assets/js/157.83beef7f.js"><link rel="prefetch" href="/assets/js/158.bb1794b0.js"><link rel="prefetch" href="/assets/js/159.2d54e792.js"><link rel="prefetch" href="/assets/js/16.04336c71.js"><link rel="prefetch" href="/assets/js/160.99d56586.js"><link rel="prefetch" href="/assets/js/161.edf660aa.js"><link rel="prefetch" href="/assets/js/162.0b84606e.js"><link rel="prefetch" href="/assets/js/163.b59e0d60.js"><link rel="prefetch" href="/assets/js/164.d9eb8228.js"><link rel="prefetch" href="/assets/js/165.ca624c79.js"><link rel="prefetch" href="/assets/js/166.025b2ba1.js"><link rel="prefetch" href="/assets/js/167.abc982cc.js"><link rel="prefetch" href="/assets/js/168.27ca13dc.js"><link rel="prefetch" href="/assets/js/169.41e753a2.js"><link rel="prefetch" href="/assets/js/17.43b3c1c8.js"><link rel="prefetch" href="/assets/js/170.626319e1.js"><link rel="prefetch" href="/assets/js/171.a221dddf.js"><link rel="prefetch" href="/assets/js/172.464b2361.js"><link rel="prefetch" href="/assets/js/173.96a3afee.js"><link rel="prefetch" href="/assets/js/174.116607c2.js"><link rel="prefetch" href="/assets/js/175.ea3e8659.js"><link rel="prefetch" href="/assets/js/176.7d7b8afc.js"><link rel="prefetch" href="/assets/js/177.a6e00aa0.js"><link rel="prefetch" href="/assets/js/178.1f93afaf.js"><link rel="prefetch" href="/assets/js/179.3aa00dcd.js"><link rel="prefetch" href="/assets/js/18.d81b44d5.js"><link rel="prefetch" href="/assets/js/180.f8b2b75a.js"><link rel="prefetch" href="/assets/js/181.8e11258a.js"><link rel="prefetch" href="/assets/js/182.22243941.js"><link rel="prefetch" href="/assets/js/183.d051fdf6.js"><link rel="prefetch" href="/assets/js/184.a994075e.js"><link rel="prefetch" href="/assets/js/185.776c7e16.js"><link rel="prefetch" href="/assets/js/186.f1887955.js"><link rel="prefetch" href="/assets/js/187.da0d3626.js"><link rel="prefetch" href="/assets/js/188.8dfc358f.js"><link rel="prefetch" href="/assets/js/189.dcac5a59.js"><link rel="prefetch" href="/assets/js/19.1b3d66e1.js"><link rel="prefetch" href="/assets/js/190.c7e413d0.js"><link rel="prefetch" href="/assets/js/191.d9806121.js"><link rel="prefetch" href="/assets/js/192.869791da.js"><link rel="prefetch" href="/assets/js/193.2d74c4c8.js"><link rel="prefetch" href="/assets/js/194.c73a1909.js"><link rel="prefetch" href="/assets/js/195.e8c74834.js"><link rel="prefetch" href="/assets/js/20.bd5949ec.js"><link rel="prefetch" href="/assets/js/21.3fcf98cf.js"><link rel="prefetch" href="/assets/js/22.2fa1e2e8.js"><link rel="prefetch" href="/assets/js/23.1ae64bb4.js"><link rel="prefetch" href="/assets/js/24.7bdf7387.js"><link rel="prefetch" href="/assets/js/25.392c436e.js"><link rel="prefetch" href="/assets/js/26.58acbd4b.js"><link rel="prefetch" href="/assets/js/27.c725bdd5.js"><link rel="prefetch" href="/assets/js/28.6c9bda1e.js"><link rel="prefetch" href="/assets/js/29.e656b537.js"><link rel="prefetch" href="/assets/js/30.2c326fc7.js"><link rel="prefetch" href="/assets/js/31.e6c9fa30.js"><link rel="prefetch" href="/assets/js/32.c9c88437.js"><link rel="prefetch" href="/assets/js/33.0c53373c.js"><link rel="prefetch" href="/assets/js/34.9821e543.js"><link rel="prefetch" href="/assets/js/35.de8253eb.js"><link rel="prefetch" href="/assets/js/36.d182f929.js"><link rel="prefetch" href="/assets/js/37.9fa79014.js"><link rel="prefetch" href="/assets/js/38.9bebff76.js"><link rel="prefetch" href="/assets/js/39.19a3a2d4.js"><link rel="prefetch" href="/assets/js/4.564edb9d.js"><link rel="prefetch" href="/assets/js/40.cca6955f.js"><link rel="prefetch" href="/assets/js/41.854cd09a.js"><link rel="prefetch" href="/assets/js/42.ca7b612f.js"><link rel="prefetch" href="/assets/js/43.87027d58.js"><link rel="prefetch" href="/assets/js/44.8c2b4f4b.js"><link rel="prefetch" href="/assets/js/45.dffb4e08.js"><link rel="prefetch" href="/assets/js/46.f58049a5.js"><link rel="prefetch" href="/assets/js/47.6854070c.js"><link rel="prefetch" href="/assets/js/48.6cd9fa3d.js"><link rel="prefetch" href="/assets/js/49.e8861afa.js"><link rel="prefetch" href="/assets/js/5.5c31d62f.js"><link rel="prefetch" href="/assets/js/51.6655c373.js"><link rel="prefetch" href="/assets/js/52.deb2eb09.js"><link rel="prefetch" href="/assets/js/53.6e0ed77d.js"><link rel="prefetch" href="/assets/js/54.b05c58ad.js"><link rel="prefetch" href="/assets/js/55.49c8164e.js"><link rel="prefetch" href="/assets/js/56.a5574e6b.js"><link rel="prefetch" href="/assets/js/57.58cb0de4.js"><link rel="prefetch" href="/assets/js/58.52345112.js"><link rel="prefetch" href="/assets/js/59.663ce78d.js"><link rel="prefetch" href="/assets/js/6.a9df34ee.js"><link rel="prefetch" href="/assets/js/60.f06adde2.js"><link rel="prefetch" href="/assets/js/61.170255a1.js"><link rel="prefetch" href="/assets/js/62.9d120050.js"><link rel="prefetch" href="/assets/js/63.70cced6b.js"><link rel="prefetch" href="/assets/js/64.577f3548.js"><link rel="prefetch" href="/assets/js/65.c037b29d.js"><link rel="prefetch" href="/assets/js/66.7dd1045f.js"><link rel="prefetch" href="/assets/js/67.d3aa4d6c.js"><link rel="prefetch" href="/assets/js/68.526dbb61.js"><link rel="prefetch" href="/assets/js/69.58269266.js"><link rel="prefetch" href="/assets/js/7.6609d4d6.js"><link rel="prefetch" href="/assets/js/70.64108f1b.js"><link rel="prefetch" href="/assets/js/71.1e95e0a6.js"><link rel="prefetch" href="/assets/js/72.42e7ec94.js"><link rel="prefetch" href="/assets/js/73.dad4e1c5.js"><link rel="prefetch" href="/assets/js/74.28ea286a.js"><link rel="prefetch" href="/assets/js/75.dd6d4c6f.js"><link rel="prefetch" href="/assets/js/76.ca6539df.js"><link rel="prefetch" href="/assets/js/77.feb13b0e.js"><link rel="prefetch" href="/assets/js/78.321e90e6.js"><link rel="prefetch" href="/assets/js/79.68eb8fcf.js"><link rel="prefetch" href="/assets/js/8.396d51fd.js"><link rel="prefetch" href="/assets/js/80.4edb5321.js"><link rel="prefetch" href="/assets/js/81.735d7e57.js"><link rel="prefetch" href="/assets/js/82.fa120bdf.js"><link rel="prefetch" href="/assets/js/83.bf755f94.js"><link rel="prefetch" href="/assets/js/84.9b32070c.js"><link rel="prefetch" href="/assets/js/85.592aca7c.js"><link rel="prefetch" href="/assets/js/86.4dcd9e73.js"><link rel="prefetch" href="/assets/js/87.a9e546aa.js"><link rel="prefetch" href="/assets/js/88.2a423212.js"><link rel="prefetch" href="/assets/js/89.5f455115.js"><link rel="prefetch" href="/assets/js/9.adb074c6.js"><link rel="prefetch" href="/assets/js/90.5202da0a.js"><link rel="prefetch" href="/assets/js/91.02cee99d.js"><link rel="prefetch" href="/assets/js/92.f16bad0b.js"><link rel="prefetch" href="/assets/js/93.f933634f.js"><link rel="prefetch" href="/assets/js/94.8e7b1d65.js"><link rel="prefetch" href="/assets/js/95.ee0e4a0a.js"><link rel="prefetch" href="/assets/js/96.e21d78c2.js"><link rel="prefetch" href="/assets/js/97.c87e514e.js"><link rel="prefetch" href="/assets/js/98.d123ac92.js"><link rel="prefetch" href="/assets/js/99.92d1b416.js">
    <link rel="stylesheet" href="/assets/css/0.styles.91f57736.css">
  </head>
  <body>
    <div id="app" data-server-rendered="true"><div class="theme-container no-sidebar" data-v-3ba18f14><div data-v-3ba18f14><div id="loader-wrapper" class="loading-wrapper" data-v-041fef5b data-v-3ba18f14 data-v-3ba18f14><div class="loader-main" data-v-041fef5b><div data-v-041fef5b></div><div data-v-041fef5b></div><div data-v-041fef5b></div><div data-v-041fef5b></div></div> <!----> <!----></div> <div class="password-shadow password-wrapper-out" style="display:none;" data-v-68139a52 data-v-3ba18f14 data-v-3ba18f14><h3 class="title" style="display:none;" data-v-68139a52 data-v-68139a52>JavaKeeper</h3> <!----> <label id="box" class="inputBox" style="display:none;" data-v-68139a52 data-v-68139a52><input type="password" value="" data-v-68139a52> <span data-v-68139a52>Konck! Knock!</span> <button data-v-68139a52>OK</button></label> <div class="footer" style="display:none;" data-v-68139a52 data-v-68139a52><span data-v-68139a52><i class="iconfont reco-theme" data-v-68139a52></i> <a target="blank" href="https://vuepress-theme-reco.recoluan.com" data-v-68139a52>vuePress-theme-reco</a></span> <span data-v-68139a52><i class="iconfont reco-copyright" data-v-68139a52></i> <a data-v-68139a52><span data-v-68139a52>海星</span>
            
          <!---->
          2020
        </a></span></div></div> <div class="hide" data-v-3ba18f14><header class="navbar" data-v-3ba18f14><div class="sidebar-button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" role="img" viewBox="0 0 448 512" class="icon"><path fill="currentColor" d="M436 124H12c-6.627 0-12-5.373-12-12V80c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12zm0 160H12c-6.627 0-12-5.373-12-12v-32c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12zm0 160H12c-6.627 0-12-5.373-12-12v-32c0-6.627 5.373-12 12-12h424c6.627 0 12 5.373 12 12v32c0 6.627-5.373 12-12 12z"></path></svg></div> <a href="/" class="home-link router-link-active"><!----> <span class="site-name">JavaKeeper</span></a> <div class="links"><div class="color-picker"><a class="color-button"><i class="iconfont reco-color"></i></a> <div class="color-picker-menu" style="display:none;"><div class="mode-options"><h4 class="title">Choose mode</h4> <ul class="color-mode-options"><li class="dark">dark</li><li class="auto active">auto</li><li class="light">light</li></ul></div></div></div> <div class="search-box"><i class="iconfont reco-search"></i> <input aria-label="Search" autocomplete="off" spellcheck="false" value=""> <!----></div> <nav class="nav-links can-hide"><div class="nav-item"><a href="/java/" class="nav-link"><i class="iconfont undefined"></i>
  Java
</a></div><div class="nav-item"><a href="/data-structure-algorithms/" class="nav-link"><i class="iconfont undefined"></i>
  数据结构与算法
</a></div><div class="nav-item"><a href="/data-store/" class="nav-link router-link-active"><i class="iconfont undefined"></i>
  数据存储与缓存
</a></div><div class="nav-item"><a href="/interview/" class="nav-link"><i class="iconfont undefined"></i>
  直击面试
</a></div> <a href="https://github.com/Jstarfish/JavaKeeper" target="_blank" rel="noopener noreferrer" class="repo-link"><i class="iconfont reco-github"></i>
    GitHub
    <svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></nav></div></header> <div class="sidebar-mask" data-v-3ba18f14></div> <aside class="sidebar" data-v-3ba18f14><div class="personal-info-wrapper" data-v-5f6acefd data-v-3ba18f14><!----> <h3 class="name" data-v-5f6acefd>
    海星
  </h3> <div class="num" data-v-5f6acefd><div data-v-5f6acefd><h3 data-v-5f6acefd>0</h3> <h6 data-v-5f6acefd>Article</h6></div> <div data-v-5f6acefd><h3 data-v-5f6acefd>0</h3> <h6 data-v-5f6acefd>Tag</h6></div></div> <hr data-v-5f6acefd></div> <nav class="nav-links"><div class="nav-item"><a href="/java/" class="nav-link"><i class="iconfont undefined"></i>
  Java
</a></div><div class="nav-item"><a href="/data-structure-algorithms/" class="nav-link"><i class="iconfont undefined"></i>
  数据结构与算法
</a></div><div class="nav-item"><a href="/data-store/" class="nav-link router-link-active"><i class="iconfont undefined"></i>
  数据存储与缓存
</a></div><div class="nav-item"><a href="/interview/" class="nav-link"><i class="iconfont undefined"></i>
  直击面试
</a></div> <a href="https://github.com/Jstarfish/JavaKeeper" target="_blank" rel="noopener noreferrer" class="repo-link"><i class="iconfont reco-github"></i>
    GitHub
    <svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></nav> <!----> </aside> <div class="password-shadow password-wrapper-in" style="display:none;" data-v-68139a52 data-v-3ba18f14><h3 class="title" style="display:none;" data-v-68139a52 data-v-68139a52></h3> <!----> <label id="box" class="inputBox" style="display:none;" data-v-68139a52 data-v-68139a52><input type="password" value="" data-v-68139a52> <span data-v-68139a52>Konck! Knock!</span> <button data-v-68139a52>OK</button></label> <div class="footer" style="display:none;" data-v-68139a52 data-v-68139a52><span data-v-68139a52><i class="iconfont reco-theme" data-v-68139a52></i> <a target="blank" href="https://vuepress-theme-reco.recoluan.com" data-v-68139a52>vuePress-theme-reco</a></span> <span data-v-68139a52><i class="iconfont reco-copyright" data-v-68139a52></i> <a data-v-68139a52><span data-v-68139a52>海星</span>
            
          <!---->
          2020
        </a></span></div></div> <div data-v-3ba18f14><main class="page"><div class="page-title" style="display:none;"><h1 class="title">布隆过滤器是什么？</h1> <div data-v-5d8dbdb4><i class="iconfont reco-account" data-v-5d8dbdb4><span data-v-5d8dbdb4>海星</span></i> <!----> <!----> <!----></div></div> <div class="theme-reco-content content__default" style="display:none;"><h2 id="布隆过滤器是什么"><a href="#布隆过滤器是什么" class="header-anchor">#</a> 布隆过滤器是什么？</h2> <p>布隆过滤器可以理解为一个不怎么精确的 set 结构，当你使用它的 contains 方法判断某个对象是否存在时，它可能会误判。但是布隆过滤器也不是特别不精确，只要参数设置的合理，它的精确度可以控制的相对足够精确，只会有小小的误判概率。</p> <p>当布隆过滤器说某个值存在时，这个值可能不存在；当它说不存在时，那就肯定不存在。打个比方，当它说不认识你时，肯定就不认识；当它说见过你时，可能根本就没见过面，不过因为你的脸跟它认识的人中某脸比较相似 (某些熟脸的系数组合)，所以误判以前见过你。</p> <p>套在上面的使用场景中，布隆过滤器能准确过滤掉那些已经看过的内容，那些没有看过的新内容，它也会过滤掉极小一部分 (误判)，但是绝大多数新内容它都能准确识别。这样就可以完全保证推荐给用户的内容都是无重复的。</p> <h2 id="redis-中的布隆过滤器"><a href="#redis-中的布隆过滤器" class="header-anchor">#</a> Redis 中的布隆过滤器</h2> <p>Redis 官方提供的布隆过滤器到了 Redis 4.0 提供了插件功能之后才正式登场。布隆过滤器作为一个插件加载到 Redis Server 中，给 Redis 提供了强大的布隆去重功能。</p> <p>下面我们来体验一下 Redis 4.0 的布隆过滤器，为了省去繁琐安装过程，我们直接用 Docker 吧。</p> <div class="language- extra-class"><pre class="language-text"><code>&gt; docker pull redislabs/rebloom  # 拉取镜像
&gt; docker run -p6379:6379 redislabs/rebloom  # 运行容器
&gt; redis-cli  # 连接容器中的 redis 服务
</code></pre></div><p>如果上面三条指令执行没有问题，下面就可以体验布隆过滤器了。</p> <h2 id="布隆过滤器基本使用"><a href="#布隆过滤器基本使用" class="header-anchor">#</a> 布隆过滤器基本使用</h2> <p>布隆过滤器有二个基本指令，<code>bf.add</code> 添加元素，<code>bf.exists</code> 查询元素是否存在，它的用法和 set 集合的 sadd 和 sismember 差不多。注意 <code>bf.add</code> 只能一次添加一个元素，如果想要一次添加多个，就需要用到 <code>bf.madd</code> 指令。同样如果需要一次查询多个元素是否存在，就需要用到 <code>bf.mexists</code> 指令。</p> <div class="language- extra-class"><pre class="language-text"><code>127.0.0.1:6379&gt; bf.add codehole user1
(integer) 1
127.0.0.1:6379&gt; bf.add codehole user2
(integer) 1
127.0.0.1:6379&gt; bf.add codehole user3
(integer) 1
127.0.0.1:6379&gt; bf.exists codehole user1
(integer) 1
127.0.0.1:6379&gt; bf.exists codehole user2
(integer) 1
127.0.0.1:6379&gt; bf.exists codehole user3
(integer) 1
127.0.0.1:6379&gt; bf.exists codehole user4
(integer) 0
127.0.0.1:6379&gt; bf.madd codehole user4 user5 user6
1) (integer) 1
2) (integer) 1
3) (integer) 1
127.0.0.1:6379&gt; bf.mexists codehole user4 user5 user6 user7
1) (integer) 1
2) (integer) 1
3) (integer) 1
4) (integer) 0
</code></pre></div><p>Java 客户端 Jedis-2.x 没有提供指令扩展机制，所以你无法直接使用 Jedis 来访问 Redis Module 提供的 <a href="http://bf.xxx/" target="_blank" rel="noopener noreferrer">bf.xxx<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a> 指令。RedisLabs 提供了一个单独的包 <a href="https://github.com/RedisLabs/JReBloom" target="_blank" rel="noopener noreferrer">JReBloom<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a>，但是它是基于 Jedis-3.0，Jedis-3.0 这个包目前还没有进入 release，没有进入 maven 的中央仓库，需要在 Github 上下载。在使用上很不方便，如果怕麻烦，还可以使用 <a href="https://github.com/lettuce-io/lettuce-core" target="_blank" rel="noopener noreferrer">lettuce<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a>，它是另一个 Redis 的客户端，相比 Jedis 而言，它很早就支持了指令扩展。</p> <div class="language- extra-class"><pre class="language-text"><code>public class BloomTest {

  public static void main(String[] args) {
    Client client = new Client();

    client.delete(&quot;codehole&quot;);
    for (int i = 0; i &lt; 100000; i++) {
      client.add(&quot;codehole&quot;, &quot;user&quot; + i);
      boolean ret = client.exists(&quot;codehole&quot;, &quot;user&quot; + i);
      if (!ret) {
        System.out.println(i);
        break;
      }
    }

    client.close();
  }

}
</code></pre></div><p>执行上面的代码后，你会张大了嘴巴发现居然没有输出，塞进去了 100000 个元素，还是没有误判，这是怎么回事？如果你不死心的话，可以将数字再加一个 0 试试，你会发现依然没有误判。</p> <p>原因就在于布隆过滤器对于已经见过的元素肯定不会误判，它只会误判那些没见过的元素。所以我们要稍微改一下上面的脚本，使用 bf.exists 去查找没见过的元素，看看它是不是以为自己见过了。</p> <div class="language-java extra-class"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">BloomTest</span> <span class="token punctuation">{</span>

  <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token class-name">Client</span> client <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">Client</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>

    client<span class="token punctuation">.</span><span class="token function">delete</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> <span class="token number">100000</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
      client<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">,</span> <span class="token string">&quot;user&quot;</span> <span class="token operator">+</span> i<span class="token punctuation">)</span><span class="token punctuation">;</span>
      <span class="token keyword">boolean</span> ret <span class="token operator">=</span> client<span class="token punctuation">.</span><span class="token function">exists</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">,</span> <span class="token string">&quot;user&quot;</span> <span class="token operator">+</span> <span class="token punctuation">(</span>i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
      <span class="token keyword">if</span> <span class="token punctuation">(</span>ret<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>i<span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token keyword">break</span><span class="token punctuation">;</span>
      <span class="token punctuation">}</span>
    <span class="token punctuation">}</span>

    client<span class="token punctuation">.</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span>

<span class="token punctuation">}</span>
</code></pre></div><p>运行后，我们看到了输出是 214，也就是到第 214 的时候，它出现了误判。</p> <p>那如何来测量误判率呢？我们先随机出一堆字符串，然后切分为 2 组，将其中一组塞入布隆过滤器，然后再判断另外一组的字符串存在与否，取误判的个数和字符串总量一半的百分比作为误判率。</p> <div class="language-java extra-class"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">BloomTest</span> <span class="token punctuation">{</span>

  <span class="token keyword">private</span> <span class="token class-name">String</span> chars<span class="token punctuation">;</span>
  <span class="token punctuation">{</span>
    <span class="token class-name">StringBuilder</span> builder <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">StringBuilder</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> <span class="token number">26</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
      builder<span class="token punctuation">.</span><span class="token function">append</span><span class="token punctuation">(</span><span class="token punctuation">(</span><span class="token keyword">char</span><span class="token punctuation">)</span> <span class="token punctuation">(</span><span class="token string">'a'</span> <span class="token operator">+</span> i<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token punctuation">}</span>
    chars <span class="token operator">=</span> builder<span class="token punctuation">.</span><span class="token function">toString</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span>

  <span class="token keyword">private</span> <span class="token class-name">String</span> <span class="token function">randomString</span><span class="token punctuation">(</span><span class="token keyword">int</span> n<span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token class-name">StringBuilder</span> builder <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">StringBuilder</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> n<span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
      <span class="token keyword">int</span> idx <span class="token operator">=</span> <span class="token class-name">ThreadLocalRandom</span><span class="token punctuation">.</span><span class="token function">current</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token function">nextInt</span><span class="token punctuation">(</span>chars<span class="token punctuation">.</span><span class="token function">length</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
      builder<span class="token punctuation">.</span><span class="token function">append</span><span class="token punctuation">(</span>chars<span class="token punctuation">.</span><span class="token function">charAt</span><span class="token punctuation">(</span>idx<span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token punctuation">}</span>
    <span class="token keyword">return</span> builder<span class="token punctuation">.</span><span class="token function">toString</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span>

  <span class="token keyword">private</span> <span class="token class-name">List</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> <span class="token function">randomUsers</span><span class="token punctuation">(</span><span class="token keyword">int</span> n<span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token class-name">List</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> users <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">ArrayList</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token punctuation">&gt;</span></span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator">&lt;</span> <span class="token number">100000</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
      users<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token function">randomString</span><span class="token punctuation">(</span><span class="token number">64</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token punctuation">}</span>
    <span class="token keyword">return</span> users<span class="token punctuation">;</span>
  <span class="token punctuation">}</span>

  <span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>
    <span class="token class-name">BloomTest</span> bloomer <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">BloomTest</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token class-name">List</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> users <span class="token operator">=</span> bloomer<span class="token punctuation">.</span><span class="token function">randomUsers</span><span class="token punctuation">(</span><span class="token number">100000</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token class-name">List</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> usersTrain <span class="token operator">=</span> users<span class="token punctuation">.</span><span class="token function">subList</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> users<span class="token punctuation">.</span><span class="token function">size</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token class-name">List</span><span class="token generics"><span class="token punctuation">&lt;</span><span class="token class-name">String</span><span class="token punctuation">&gt;</span></span> usersTest <span class="token operator">=</span> users<span class="token punctuation">.</span><span class="token function">subList</span><span class="token punctuation">(</span>users<span class="token punctuation">.</span><span class="token function">size</span><span class="token punctuation">(</span><span class="token punctuation">)</span> <span class="token operator">/</span> <span class="token number">2</span><span class="token punctuation">,</span> users<span class="token punctuation">.</span><span class="token function">size</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>

    <span class="token class-name">Client</span> client <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">Client</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    client<span class="token punctuation">.</span><span class="token function">delete</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token class-name">String</span> user <span class="token operator">:</span> usersTrain<span class="token punctuation">)</span> <span class="token punctuation">{</span>
      client<span class="token punctuation">.</span><span class="token function">add</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">,</span> user<span class="token punctuation">)</span><span class="token punctuation">;</span>
    <span class="token punctuation">}</span>
    <span class="token keyword">int</span> falses <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token class-name">String</span> user <span class="token operator">:</span> usersTest<span class="token punctuation">)</span> <span class="token punctuation">{</span>
      <span class="token keyword">boolean</span> ret <span class="token operator">=</span> client<span class="token punctuation">.</span><span class="token function">exists</span><span class="token punctuation">(</span><span class="token string">&quot;codehole&quot;</span><span class="token punctuation">,</span> user<span class="token punctuation">)</span><span class="token punctuation">;</span>
      <span class="token keyword">if</span> <span class="token punctuation">(</span>ret<span class="token punctuation">)</span> <span class="token punctuation">{</span>
        falses<span class="token operator">++</span><span class="token punctuation">;</span>
      <span class="token punctuation">}</span>
    <span class="token punctuation">}</span>
    <span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">printf</span><span class="token punctuation">(</span><span class="token string">&quot;%d %d\n&quot;</span><span class="token punctuation">,</span> falses<span class="token punctuation">,</span> usersTest<span class="token punctuation">.</span><span class="token function">size</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
    client<span class="token punctuation">.</span><span class="token function">close</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
  <span class="token punctuation">}</span>

<span class="token punctuation">}</span>
</code></pre></div><p>运行一下，等待大约一分钟，输出:</p> <div class="language- extra-class"><pre class="language-text"><code>total users 100000
all trained
628 50000
</code></pre></div><p>可以看到误判率大约 1% 多点。你也许会问这个误判率还是有点高啊，有没有办法降低一点？答案是有的。</p> <p>我们上面使用的布隆过滤器只是默认参数的布隆过滤器，它在我们第一次 add 的时候自动创建。Redis 其实还提供了自定义参数的布隆过滤器，需要我们在 add 之前使用<code>bf.reserve</code>指令显式创建。如果对应的 key 已经存在，<code>bf.reserve</code>会报错。<code>bf.reserve</code>有三个参数，分别是 key, <code>error_rate</code>和<code>initial_size</code>。错误率越低，需要的空间越大。<code>initial_size</code>参数表示预计放入的元素数量，当实际数量超出这个数值时，误判率会上升。</p> <p>所以需要提前设置一个较大的数值避免超出导致误判率升高。如果不使用 bf.reserve，默认的<code>error_rate</code>是 0.01，默认的<code>initial_size</code>是 100。</p> <p>接下来我们使用 bf.reserve 改造一下上面的脚本：</p> <p>Java 版本：</p> <div class="language- extra-class"><pre class="language-text"><code>public class BloomTest {

  private String chars;
  {
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i &lt; 26; i++) {
      builder.append((char) ('a' + i));
    }
    chars = builder.toString();
  }

  private String randomString(int n) {
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i &lt; n; i++) {
      int idx = ThreadLocalRandom.current().nextInt(chars.length());
      builder.append(chars.charAt(idx));
    }
    return builder.toString();
  }

  private List&lt;String&gt; randomUsers(int n) {
    List&lt;String&gt; users = new ArrayList&lt;&gt;();
    for (int i = 0; i &lt; 100000; i++) {
      users.add(randomString(64));
    }
    return users;
  }

  public static void main(String[] args) {
    BloomTest bloomer = new BloomTest();
    List&lt;String&gt; users = bloomer.randomUsers(100000);
    List&lt;String&gt; usersTrain = users.subList(0, users.size() / 2);
    List&lt;String&gt; usersTest = users.subList(users.size() / 2, users.size());

    Client client = new Client();
    client.delete(&quot;codehole&quot;);
    // 对应 bf.reserve 指令
    client.createFilter(&quot;codehole&quot;, 50000, 0.001);
    for (String user : usersTrain) {
      client.add(&quot;codehole&quot;, user);
    }
    int falses = 0;
    for (String user : usersTest) {
      boolean ret = client.exists(&quot;codehole&quot;, user);
      if (ret) {
        falses++;
      }
    }
    System.out.printf(&quot;%d %d\n&quot;, falses, usersTest.size());
    client.close();
  }

}
</code></pre></div><p>运行一下，等待约 1 分钟，输出如下：</p> <div class="language- extra-class"><pre class="language-text"><code>total users 100000
all trained
6 50000
</code></pre></div><p>我们看到了误判率大约 0.012%，比预计的 0.1% 低很多，不过布隆的概率是有误差的，只要不比预计误判率高太多，都是正常现象。</p> <h2 id="注意事项"><a href="#注意事项" class="header-anchor">#</a> 注意事项</h2> <p>布隆过滤器的<code>initial_size</code>估计的过大，会浪费存储空间，估计的过小，就会影响准确率，用户在使用之前一定要尽可能地精确估计好元素数量，还需要加上一定的冗余空间以避免实际元素可能会意外高出估计值很多。</p> <p>布隆过滤器的<code>error_rate</code>越小，需要的存储空间就越大，对于不需要过于精确的场合，<code>error_rate</code>设置稍大一点也无伤大雅。比如在新闻去重上而言，误判率高一点只会让小部分文章不能让合适的人看到，文章的整体阅读量不会因为这点误判率就带来巨大的改变。</p> <h2 id="布隆过滤器的原理"><a href="#布隆过滤器的原理" class="header-anchor">#</a> 布隆过滤器的原理</h2> <p>学会了布隆过滤器的使用，下面有必要把原理解释一下，不然读者还会继续蒙在鼓里</p> <p><img src="https://user-gold-cdn.xitu.io/2018/7/4/16464301a0e26416?imageView2/0/w/1280/h/960/format/webp/ignore-error/1" alt="img"></p> <p>每个布隆过滤器对应到 Redis 的数据结构里面就是一个大型的位数组和几个不一样的无偏 hash 函数。所谓无偏就是能够把元素的 hash 值算得比较均匀。</p> <p>向布隆过滤器中添加 key 时，会使用多个 hash 函数对 key 进行 hash 算得一个整数索引值然后对位数组长度进行取模运算得到一个位置，每个 hash 函数都会算得一个不同的位置。再把位数组的这几个位置都置为 1 就完成了 add 操作。</p> <p>向布隆过滤器询问 key 是否存在时，跟 add 一样，也会把 hash 的几个位置都算出来，看看位数组中这几个位置是否都为 1，只要有一个位为 0，那么说明布隆过滤器中这个 key 不存在。如果都是 1，这并不能说明这个 key 就一定存在，只是极有可能存在，因为这些位被置为 1 可能是因为其它的 key 存在所致。如果这个位数组比较稀疏，判断正确的概率就会很大，如果这个位数组比较拥挤，判断正确的概率就会降低。具体的概率计算公式比较复杂，感兴趣可以阅读扩展阅读，非常烧脑，不建议读者细看。</p> <p>使用时不要让实际元素远大于初始化大小，当实际元素开始超出初始化大小时，应该对布隆过滤器进行重建，重新分配一个 size 更大的过滤器，再将所有的历史元素批量 add 进去 (这就要求我们在其它的存储器中记录所有的历史元素)。因为 error_rate 不会因为数量超出就急剧增加，这就给我们重建过滤器提供了较为宽松的时间。</p> <h2 id="空间占用估计"><a href="#空间占用估计" class="header-anchor">#</a> 空间占用估计</h2> <p>布隆过滤器的空间占用有一个简单的计算公式，但是推导比较繁琐，这里就省去推导过程了，直接引出计算公式，感兴趣的读者可以点击「扩展阅读」深入理解公式的推导过程。</p> <p>布隆过滤器有两个参数，第一个是预计元素的数量 n，第二个是错误率 f。公式根据这两个输入得到两个输出，第一个输出是位数组的长度 l，也就是需要的存储空间大小 (bit)，第二个输出是 hash 函数的最佳数量 k。hash 函数的数量也会直接影响到错误率，最佳的数量会有最低的错误率。</p> <div class="language- extra-class"><pre class="language-text"><code>k=0.7*(l/n)  # 约等于
f=0.6185^(l/n)  # ^ 表示次方计算，也就是 math.pow
</code></pre></div><p>从公式中可以看出</p> <ol><li>位数组相对越长 (l/n)，错误率 f 越低，这个和直观上理解是一致的</li> <li>位数组相对越长 (l/n)，hash 函数需要的最佳数量也越多，影响计算效率</li> <li>当一个元素平均需要 1 个字节 (8bit) 的指纹空间时 (l/n=8)，错误率大约为 2%</li> <li>错误率为 10%，一个元素需要的平均指纹空间为 4.792 个 bit，大约为 5bit</li> <li>错误率为 1%，一个元素需要的平均指纹空间为 9.585 个 bit，大约为 10bit</li> <li>错误率为 0.1%，一个元素需要的平均指纹空间为 14.377 个 bit，大约为 15bit</li></ol> <p>你也许会想，如果一个元素需要占据 15 个 bit，那相对 set 集合的空间优势是不是就没有那么明显了？这里需要明确的是，set 中会存储每个元素的内容，而布隆过滤器仅仅存储元素的指纹。元素的内容大小就是字符串的长度，它一般会有多个字节，甚至是几十个上百个字节，每个元素本身还需要一个指针被 set 集合来引用，这个指针又会占去 4 个字节或 8 个字节，取决于系统是 32bit 还是 64bit。而指纹空间只有接近 2 个字节，所以布隆过滤器的空间优势还是非常明显的。</p> <p>如果读者觉得公式计算起来太麻烦，也没有关系，有很多现成的网站已经支持计算空间占用的功能了，我们只要把参数输进去，就可以直接看到结果，比如 <a href="https://krisives.github.io/bloom-calculator/" target="_blank" rel="noopener noreferrer">布隆计算器<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a>。</p> <p><img src="https://user-gold-cdn.xitu.io/2018/7/4/16464885cf1f74c2?imageView2/0/w/1280/h/960/format/webp/ignore-error/1" alt="img"></p> <h2 id="实际元素超出时-误判率会怎样变化"><a href="#实际元素超出时-误判率会怎样变化" class="header-anchor">#</a> 实际元素超出时，误判率会怎样变化</h2> <p>当实际元素超出预计元素时，错误率会有多大变化，它会急剧上升么，还是平缓地上升，这就需要另外一个公式，引入参数 t 表示实际元素和预计元素的倍数 t</p> <div class="language- extra-class"><pre class="language-text"><code>f=(1-0.5^t)^k  # 极限近似，k 是 hash 函数的最佳数量
</code></pre></div><p>当 t 增大时，错误率，f 也会跟着增大，分别选择错误率为 10%,1%,0.1% 的 k 值，画出它的曲线进行直观观察</p> <p><img src="https://user-gold-cdn.xitu.io/2018/7/5/164685454156e8e2?imageView2/0/w/1280/h/960/format/webp/ignore-error/1" alt="img"></p> <p>从这个图中可以看出曲线还是比较陡峭的</p> <ol><li>错误率为 10% 时，倍数比为 2 时，错误率就会升至接近 40%，这个就比较危险了</li> <li>错误率为 1% 时，倍数比为 2 时，错误率升至 15%，也挺可怕的</li> <li>错误率为 0.1%，倍数比为 2 时，错误率升至 5%，也比较悬了</li></ol> <h2 id="用不上-redis4-0-怎么办"><a href="#用不上-redis4-0-怎么办" class="header-anchor">#</a> 用不上 Redis4.0 怎么办？</h2> <p>Redis 4.0 之前也有第三方的布隆过滤器 lib 使用，只不过在实现上使用 redis 的位图来实现的，性能上也要差不少。比如一次 exists 查询会涉及到多次 getbit 操作，网络开销相比而言会高出不少。另外在实现上这些第三方 lib 也不尽完美，比如 pyrebloom 库就不支持重连和重试，在使用时需要对它做一层封装后才能在生产环境中使用。</p> <ol><li><a href="https://github.com/robinhoodmarkets/pyreBloom" target="_blank" rel="noopener noreferrer">Python Redis Bloom Filter<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></li> <li><a href="https://github.com/Baqend/Orestes-Bloomfilter" target="_blank" rel="noopener noreferrer">Java Redis Bloom Filter<svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15" class="icon outbound"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path> <polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg></a></li></ol> <h2 id="布隆过滤器的其它应用"><a href="#布隆过滤器的其它应用" class="header-anchor">#</a> 布隆过滤器的其它应用</h2> <p>在爬虫系统中，我们需要对 URL 进行去重，已经爬过的网页就可以不用爬了。但是 URL 太多了，几千万几个亿，如果用一个集合装下这些 URL 地址那是非常浪费空间的。这时候就可以考虑使用布隆过滤器。它可以大幅降低去重存储消耗，只不过也会使得爬虫系统错过少量的页面。</p> <p>布隆过滤器在 NoSQL 数据库领域使用非常广泛，我们平时用到的 HBase、Cassandra 还有 LevelDB、RocksDB 内部都有布隆过滤器结构，布隆过滤器可以显著降低数据库的 IO 请求数量。当用户来查询某个 row 时，可以先通过内存中的布隆过滤器过滤掉大量不存在的 row 请求，然后再去磁盘进行查询。</p> <p>邮箱系统的垃圾邮件过滤功能也普遍用到了布隆过滤器，因为用了这个过滤器，所以平时也会遇到某些正常的邮件被放进了垃圾邮件目录中，这个就是误判所致，概率很低。</p></div> <footer class="page-edit" style="display:none;"><!----> <!----></footer> <!----> <!----> <!----></main> <!----></div></div></div></div><div class="global-ui"><div class="back-to-ceiling" style="right:1rem;bottom:6rem;width:2.5rem;height:2.5rem;border-radius:.25rem;line-height:2.5rem;display:none;" data-v-db14854a data-v-db14854a><svg t="1574745035067" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5404" class="icon" data-v-db14854a><path d="M526.60727968 10.90185116a27.675 27.675 0 0 0-29.21455937 0c-131.36607665 82.28402758-218.69155461 228.01873535-218.69155402 394.07834331a462.20625001 462.20625001 0 0 0 5.36959153 69.94390903c1.00431239 6.55289093-0.34802892 13.13561351-3.76865779 18.80351572-32.63518765 54.11355614-51.75690182 118.55860487-51.7569018 187.94566865a371.06718723 371.06718723 0 0 0 11.50484808 91.98906777c6.53300375 25.50556257 41.68394495 28.14064038 52.69160883 4.22606766 17.37162448-37.73630017 42.14135425-72.50938081 72.80769204-103.21549295 2.18761121 3.04276886 4.15646224 6.24463696 6.40373557 9.22774369a1871.4375 1871.4375 0 0 0 140.04691725 5.34970492 1866.36093723 1866.36093723 0 0 0 140.04691723-5.34970492c2.24727335-2.98310674 4.21612437-6.18497483 6.3937923-9.2178004 30.66633723 30.70611158 55.4360664 65.4791928 72.80769147 103.21549355 11.00766384 23.91457269 46.15860503 21.27949489 52.69160879-4.22606768a371.15156223 371.15156223 0 0 0 11.514792-91.99901164c0-69.36717486-19.13165746-133.82216804-51.75690182-187.92578088-3.42062944-5.66790279-4.76302748-12.26056868-3.76865837-18.80351632a462.20625001 462.20625001 0 0 0 5.36959269-69.943909c-0.00994388-166.08943902-87.32547796-311.81420293-218.6915546-394.09823051zM605.93803103 357.87693858a93.93749974 93.93749974 0 1 1-187.89594924 6.1e-7 93.93749974 93.93749974 0 0 1 187.89594924-6.1e-7z" p-id="5405" data-v-db14854a></path><path d="M429.50777625 765.63860547C429.50777625 803.39355007 466.44236686 1000.39046097 512.00932183 1000.39046097c45.56695499 0 82.4922232-197.00623328 82.5015456-234.7518555 0-37.75494459-36.9345906-68.35043303-82.4922232-68.34111062-45.57627738-0.00932239-82.52019037 30.59548842-82.51086798 68.34111062z" p-id="5406" data-v-db14854a></path></svg></div><!----></div></div>
    <script src="/assets/js/app.447d4224.js" defer></script><script src="/assets/js/3.9d76740c.js" defer></script><script src="/assets/js/1.c4fd7d2e.js" defer></script><script src="/assets/js/50.703bffab.js" defer></script>
  </body>
</html>
